import json
import os
from pathlib import Path

from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.components.coder.CoSTEER.evaluators import (
    CoSTEEREvaluator,
    CoSTEERSingleFeedback,
)
from rdagent.core.evolving_framework import QueriedKnowledge
from rdagent.core.experiment import FBWorkspace, Task
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.tpl import T
from rdagent.utils.agent.workflow import build_cls_from_json_with_retry
from rdagent.utils.env import DockerEnv, DSDockerConf, MLEBDockerConf
from rdagent.utils.fmt import shrink_text

DIRNAME = Path(__file__).absolute().resolve().parent

DSCoSTEEREvalFeedback = CoSTEERSingleFeedback


class DSCoSTEERCoSTEEREvaluator(CoSTEEREvaluator):

    def evaluate(
        self,
        target_task: Task,
        implementation: FBWorkspace,
        gt_implementation: FBWorkspace,
        queried_knowledge: QueriedKnowledge = None,
        **kwargs,
    ) -> DSCoSTEEREvalFeedback:
        ds_docker_conf = DSDockerConf()
        ds_docker_conf.extra_volumes = {f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": "/kaggle/input"}
        ds_docker_conf.running_timeout_period = DS_RD_SETTING.full_timeout

        de = DockerEnv(conf=ds_docker_conf)

        stdout = implementation.execute(
            env=de, entry=f"rm submission.csv scores.csv"
        )  # Remove previous submission and scores files generated by worklfow.

        # execute workflow
        stdout = implementation.execute(env=de, entry="coverage run main.py")

        score_fp = implementation.workspace_path / "scores.csv"
        if not score_fp.exists():
            stdout += "\n Metrics file (scores.csv) is not generated!"
        else:
            stdout += "\n Metrics file (scores.csv) is generated."

        submission_fp = implementation.workspace_path / "submission.csv"
        if not submission_fp.exists():
            stdout += "\n Submission file (submission.csv) is not generated!"
        else:
            # DockerEnv for MLEBench submission validation
            mle_de_conf = MLEBDockerConf()
            mle_de_conf.extra_volumes = {
                f"{DS_RD_SETTING.local_data_path}/zip_files": "/mle/data",
            }
            mde = DockerEnv(conf=mle_de_conf)
            mde.prepare()
            # MLEBench Check
            mle_check_code = (
                (Path(__file__).absolute().resolve().parent / "eval_tests" / "mle_submission_format_test.txt")
                .read_text()
                .replace("<competition_id>", self.scen.competition)
            )
            implementation.inject_files(**{"mle_submission_format_test.py": mle_check_code})
            stdout += f"\n MLEBench submission check:"
            stdout += implementation.execute(env=mde, entry="python mle_submission_format_test.py")

        # remove unused files
        implementation.execute(env=de, entry="coverage json -o coverage.json")
        if Path(implementation.workspace_path / "coverage.json").exists():
            with open(implementation.workspace_path / "coverage.json") as f:
                used_files = set(json.load(f)["files"].keys()) | {
                    "submission_format_test.py",
                    "mle_submission_format_test.py",
                }
                logger.info("All used scripts: {}".format(used_files))
                all_python_files = set(Path(implementation.workspace_path).rglob("*.py"))
                unused_files = [
                    py_file
                    for py_file in all_python_files
                    if not (py_file.name in used_files or py_file.name.endswith("test.py"))
                ]
                if unused_files:
                    logger.warning(f"Unused scripts: {unused_files}")
                    implementation.inject_files(
                        **{file_path.name: implementation.DEL_KEY for file_path in unused_files}
                    )
            os.remove(implementation.workspace_path / "coverage.json")

        system_prompt = T(".prompts:DSCoSTEER_eval.system").r(
            scenario=self.scen.get_scenario_all_desc(),
            task_desc=target_task.get_task_information(),
        )
        user_prompt = T(".prompts:DSCoSTEER_eval.user").r(
            code=implementation.all_codes,
            stdout=shrink_text(stdout),
        )

        return build_cls_from_json_with_retry(
            DSCoSTEEREvalFeedback, system_prompt=system_prompt, user_prompt=user_prompt
        )
